
//# Bit Shifter (Pipelined)

// This module is a building block for application-specific shifts and
// rotates. It synthesizes to LUT logic and Skid Buffers and can be quite
// large if not specialized to a particular situation.

// We can treat the `shift_amount` and the `shift_direction` together as
// a signed magnitude number: the amount is an absolute value, and the
// direction is the sign of the value. Here, a `shift_direction` of `1`,
// meaning a negative number, shifts to the right. Choosing this convention
// for the sign matches the behaviour of a shift when we think about it as
// a multiplication or division by a power of 2:

// * Multipliying by 8 is equivalent to 2<sup>3</sup>N, which is
// a shift-left by 3 steps.
// * Dividing N by 4 is equivalent to 2<sup>-2</sup>N, which is
// a shift-right by 2 steps.

// Adding together these multiples and fractions generated by the shifts
// enables the creation of small, cheap scaling by constant ratios:

// * 3N = N + 2<sup>1</sup>N
// * 10N = 8N + 2N = 2<sup>3</sup>N + 2<sup>1</sup>N
// * 5N/4 = N + N/4 = N + 2<sup>-2</sup>N
// * etc...

// When the shift values are constant, the shifter reduces to simple rewiring,
// which in turn reduces the above examples to an adder or two each.

// The shifts are internally unsigned and `word_in` and `word_out` are
// extended to the left and right so new bits can be shifted in and current
// bits shifted out without loss, regardless of shift amount or direction,
// which enables the creation of more complex shifts or rotates:

// * Wire the most-significant bit (MSB) of `word_in` to all `word_in_left` inputs and zero to all `word_in_right` inputs to create a signed arithmetic shift.
// * Wire the `word_in` MSB to `word_in_right` MSB (or vice-versa) to create a rotate function.
// * Feed `word_out_left` and `word_out` to a double-word adder and set the
// shift to +1 (left by 1) as part of the construction of a conditional-add
// multiplier, which multiplies two N-bit words in N cycles, giving a 2N-bit
// result.

`default_nettype none

module Bit_Shifter_Pipelined
#(
    parameter   WORD_WIDTH  = 0,
    parameter   PIPE_DEPTH  = 0
)
(
    input   wire                        clock,
    input   wire                        clear,

    input   wire                        input_valid,
    output  wire                        input_ready,

    input   wire    [WORD_WIDTH-1:0]    word_in_left,
    input   wire    [WORD_WIDTH-1:0]    word_in,
    input   wire    [WORD_WIDTH-1:0]    word_in_right,

    input   wire    [WORD_WIDTH-1:0]    shift_amount,
    input   wire                        shift_direction, // 0/1 -> left/right

    output  wire                        output_valid,
    input   wire                        output_ready,

    output  reg     [WORD_WIDTH-1:0]    word_out_left,
    output  reg     [WORD_WIDTH-1:0]    word_out,
    output  reg     [WORD_WIDTH-1:0]    word_out_right
);

// Let's document the shift direction convention again here, and define our
// initial values for the outputs and the intermediate result.

    localparam  LEFT_SHIFT  = 1'b0;

    localparam  TOTAL_WIDTH = WORD_WIDTH * 3;
    localparam  TOTAL_ZERO  = {TOTAL_WIDTH{1'b0}};
    localparam  WORD_ZERO   = {WORD_WIDTH{1'b0}};

    localparam PIPE_WIDTH   = TOTAL_WIDTH + WORD_WIDTH + 1;

    initial begin
        word_out_left    = WORD_ZERO;
        word_out         = WORD_ZERO;
        word_out_right   = WORD_ZERO;
    end 

// Pipeline the inputs, which should then retime into the shift logic.

    wire [WORD_WIDTH-1:0] word_in_left_pipelined;
    wire [WORD_WIDTH-1:0] word_in_pipelined;
    wire [WORD_WIDTH-1:0] word_in_right_pipelined;
    wire [WORD_WIDTH-1:0] shift_amount_pipelined;
    wire                  shift_direction_pipelined;

    Skid_Buffer_Pipeline
    #(
        .WORD_WIDTH (PIPE_WIDTH),
        .PIPE_DEPTH (PIPE_DEPTH)
    )
    bit_shift_pipeline
    (
        // If PIPE_DEPTH is zero, these are unused
        // verilator lint_off UNUSED
        .clock          (clock),
        .clear          (clear),
        // verilator lint_on  UNUSED
        .input_valid    (input_valid),
        .input_ready    (input_ready),
        .input_data     ({word_in_left, word_in, word_in_right, shift_amount, shift_direction}),

        .output_valid   (output_valid),
        .output_ready   (output_ready),
        .output_data    ({word_in_left_pipelined, word_in_pipelined, word_in_right_pipelined, shift_amount_pipelined, shift_direction_pipelined})
    );

// Rather than do arithmetic and calculate slices of vectors to figure out
// where the shifted bits end up, let's concatenate the input words into one
// triple-wide word, shift it as an unsigned number, then deconcatenate the
// result into each output word. All we have to do is keep the same convention
// on bit significance: here LSB is on the right.

    reg [TOTAL_WIDTH-1:0] word_in_total = TOTAL_ZERO;

    always @(*) begin
        word_in_total = {word_in_left_pipelined, word_in_pipelined, word_in_right_pipelined};
        {word_out_left, word_out, word_out_right} = (shift_direction_pipelined == LEFT_SHIFT) ? word_in_total << shift_amount_pipelined : word_in_total >> shift_amount_pipelined; 
    end

endmodule

